Building random forest models using inputs derived from a DEM and a set of landslide initiation points to predict which areas are more susceptible to landslides.

library(terra)
## terra version 1.4.22
library(TerrainWorksUtils)
library(randomForest)
## randomForest 4.6-14
## Type rfNews() to see new features/changes/bug fixes.
library(here)
## here() starts at C:/Work/Source/LandslideSusceptibility
library(ggmap)
## Loading required package: ggplot2
## 
## Attaching package: 'ggplot2'
## The following object is masked from 'package:randomForest':
## 
##     margin
## Google's Terms of Service: https://cloud.google.com/maps-platform/terms/.
## Please cite ggmap if you use it! See citation("ggmap") for details.
## 
## Attaching package: 'ggmap'
## The following object is masked from 'package:terra':
## 
##     inset
for (file in list.files(here("helper"))) {
  source(here("helper", file))
}

dataDir <- "E:/NetmapData/Scottsburg"
shade_file <- file.path(dataDir, params$shade_file)
varRasterFiles <- lapply(params$input_rasters, function(x) {
  file.path(dataDir, x)
})
analysis_region_params <- params$analysis_region_parameters
initPointsFile = file.path(dataDir, params$init_points_file)
noninitProportion = params$noninit_proportion
bufferRadius = params$buffer_radius
bufferExtractionMethod = params$buffer_extraction
initRangeExpansion = params$init_range_expansion
iterationsCount = params$iterations_count

Inputs

shade_file: E:/NetmapData/Scottsburg/shade_scottsburg.tif
varRasterFiles: E:/NetmapData/Scottsburg/dev_15.tif, E:/NetmapData/Scottsburg/prof_15.tif, E:/NetmapData/Scottsburg/grad_15.tif, E:/NetmapData/Scottsburg/plan_15.tif, E:/NetmapData/Scottsburg/pca_15m_48hr.flt, E:/NetmapData/Scottsburg/pca_15m_5hr.flt
analysisRegionParams: grad_15, plan_15
initPOintsFile: E:/NetmapData/Scottsburg/Scottsburg_Upslope.shp
noninitProportion: 3
bufferRadius: 30
bufferExtractionMethod: center cell
initRangeExpansion: 0
iterationsCount: 5

The Data

Required inputs: DEM for a region and a set of landslide initiation points. For this exploration, we are using the Scottsburg dataset, which was collected near Scottsburg, OR, including a DEM of the region and 74 landslide initiation points collected in the field.

# get background map
terrain <- terra::rast(
  ggmap::get_map(c(-123.90, 43.5978, -123.80, 43.66), 
                 source = "osm", 
                 messaging = FALSE))
## Source : http://tile.stamen.com/terrain/13/1276/2989.png
## Source : http://tile.stamen.com/terrain/13/1277/2989.png
## Source : http://tile.stamen.com/terrain/13/1278/2989.png
## Source : http://tile.stamen.com/terrain/13/1276/2990.png
## Source : http://tile.stamen.com/terrain/13/1277/2990.png
## Source : http://tile.stamen.com/terrain/13/1278/2990.png
## Source : http://tile.stamen.com/terrain/13/1276/2991.png
## Source : http://tile.stamen.com/terrain/13/1277/2991.png
## Source : http://tile.stamen.com/terrain/13/1278/2991.png
# Get hillshade 
shade <- terra::rast(file.path(dataDir, "shade_scottsburg.tif"))

# Get points
initPoints <- terra::vect(initPointsFile)

# Match projections
terrain <- terra::project(terrain, crs(shade))


plotRGB(terrain)
plot(shade, col=grey(0:100/100), legend = FALSE, add= TRUE)
points(initPoints, cex = 2, col = "#c44a41")

Model inputs

Surface metrics are calculated from the DEM using the DEMutilities toolbox from the ForestedWetlands repo. Here, we use gradient, plan curvature, profile curvature, and elevation deviation at the 15 meter scale, and partial contributing area at the 15-meter scale calculated for a 5 hour and 48 hour period. This gives us an 6 input rasters.

We also need a set of non-initiation training points. To derive a set of meaningful non-initation points, we first exclude all points outside of the range of values where landslides can be found based on the range of input values for all landslide initation points. This gives us our “analysis region.”

varRasterList <- lapply(varRasterFiles, function(file) terra::rast(file))

# Align variable rasters
varRasterList <- TerrainWorksUtils::alignRasters(shade, varRasterList)

# Combine variable rasters into a single multi-layer raster
varsRaster <- terra::rast(varRasterList)
initBuffers <- terra::buffer(initPoints, width = bufferRadius)

if (length(analysis_region_params) > 0) {
  
  # Calculate initiation range for each variable
  initRange <- createInitiationRange(
    terra::subset(varsRaster, analysis_region_params),
    initBuffers,
    initRangeExpansion
  )
  initRange
  
  # Identify cells in the study region that have variable values within their 
  # initiation ranges
  initRaster <- terra::subset(varsRaster, analysis_region_params)
  for (varName in names(initRaster)) {
    varRaster <- initRaster[[varName]]
    
    # Get variable value limits
    minInitValue <- initRange[varName, "min"]
    maxInitValue <- initRange[varName, "max"]
    
    # NA-out cells with values outside variable initiation range
    varInitRaster <- terra::app(varRaster, function(x) {
      ifelse(x < minInitValue | x > maxInitValue, NA, x)
    })
    
    # Update the raster in the input raster stack
    initRaster[[varName]] <- varInitRaster
  }
  
  # NA-out cells with ANY variable value outside its initiation range
  analysisRegionMask <- all(initRaster)
  
} else {
  analysisRegionMask <- all(varsRaster)
}

We expanded the initiation range by 0 percent.

if (length(analysis_region_params) > 0) {
  for (layer in names(initRaster)) {
    plot(!is.na(initRaster[layer]), 
         main = layer, 
         axes = FALSE)
  }
}

plot(analysisRegionMask, 
     main = "Analysis Region", 
     axes = FALSE)

# analysisCountRaster <- terra::app(initRaster, 
                                  # function(x) sum(!is.na(x)))
# plot(analysisCountRaster, 
#      col = c(RColorBrewer::brewer.pal( 
#        length(unique(values(analysisCountRaster)))-1, 
#        "YlGnBu"),
#        "#eb972a"
#      ), 
#      axes = FALSE) 

We then draw a buffer around initiation points and randomly sample non-initiation points from the remaining analysis region.

# Double the size of the initiation buffers
expInitBuffers <- terra::buffer(initPoints, 
                                width = bufferRadius * 2)

# Remove expanded initiation buffers from the viable non-initiation region
noninitRegion <- terra::copy(analysisRegionMask)
initCellIndices <- terra::extract(noninitRegion, 
                                  expInitBuffers, 
                                  cells = TRUE)$cell
noninitRegion[initCellIndices] <- NA

# Determine how many to generate
noninitBuffersCount <- ceiling(length(initPoints) * noninitProportion)

noninitBuffers <- generateNoninitiationBuffers(
  noninitBuffersCount,
  noninitRegion,
  bufferRadius
)
## Warning: [spatSample] fewer cells returned than requested

## Warning: [spatSample] fewer cells returned than requested

## Warning: [spatSample] fewer cells returned than requested

## Warning: [spatSample] fewer cells returned than requested

## Warning: [spatSample] fewer cells returned than requested
plotRGB(terrain)
plot(shade, col=grey(0:100/100), legend = FALSE, add= TRUE)
#plot(analysisRegionMask, add = TRUE, col = "#377eb8")
points(initBuffers, col = "#fb8072")
points(noninitBuffers, col = "#a6d854")
legend("topright", 
       legend = c("Analysis Region", 
                  "Initiation Points", 
                  "Non-initation Points"), 
       fill = c("#377eb8", NA, NA), 
       border = c(1, "transparent", "transparent"),
       col = c(NA, "#fb8072", "#a6d854"), 
       pch = c(NA, 16, 16), 
       pt.cex = c(1, 2, 2)
)

After generating the non-initiation points, we can extract input values from the rasters to create our training data.

# Get training data
trainingData <- extractBufferValues(
  varsRaster,
  initBuffers,
  noninitBuffers,
  bufferExtractionMethod
)

# Remove coordinates from data
coordsCols <- names(trainingData) %in% c("x", "y")  
trainingData <- trainingData[,!coordsCols]
trainingData_no_y <- trainingData[, !grepl("class", names(trainingData))]

cor(trainingData_no_y[trainingData$class == "initiation",])
##                  dev_15    prof_15     grad_15    plan_15 pca_15m_48hr
## dev_15        1.0000000  0.7362993  0.34397764 -0.6443822  -0.53568435
## prof_15       0.7362993  1.0000000  0.30833727 -0.3634900  -0.21530733
## grad_15       0.3439776  0.3083373  1.00000000 -0.1783508  -0.04878078
## plan_15      -0.6443822 -0.3634900 -0.17835078  1.0000000   0.80307085
## pca_15m_48hr -0.5356843 -0.2153073 -0.04878078  0.8030709   1.00000000
## pca_15m_5hr   0.1158157  0.1530577  0.63672171  0.1379310   0.23944724
##              pca_15m_5hr
## dev_15         0.1158157
## prof_15        0.1530577
## grad_15        0.6367217
## plan_15        0.1379310
## pca_15m_48hr   0.2394472
## pca_15m_5hr    1.0000000
cor(trainingData_no_y[trainingData$class == "non-initiation",])
##                   dev_15     prof_15     grad_15      plan_15 pca_15m_48hr
## dev_15        1.00000000  0.42208357  0.30645502 -0.564268244   -0.3730377
## prof_15       0.42208357  1.00000000 -0.15324743 -0.136460753    0.1652117
## grad_15       0.30645502 -0.15324743  1.00000000 -0.087208619   -0.2915351
## plan_15      -0.56426824 -0.13646075 -0.08720862  1.000000000    0.5435034
## pca_15m_48hr -0.37303773  0.16521169 -0.29153508  0.543503439    1.0000000
## pca_15m_5hr   0.03656099  0.03941897  0.26642234  0.005599802    0.1674090
##              pca_15m_5hr
## dev_15       0.036560985
## prof_15      0.039418965
## grad_15      0.266422342
## plan_15      0.005599802
## pca_15m_48hr 0.167409000
## pca_15m_5hr  1.000000000
panel.hist <- function(x, ...) {
    usr <- par("usr")
    on.exit(par(usr))
    par(usr = c(usr[1:2], 0, 1.5))
    his <- hist(x, plot = FALSE)
    breaks <- his$breaks
    nB <- length(breaks)
    y <- his$counts
    y <- y/max(y)
    rect(breaks[-nB], 0, breaks[-1], y, ...)
    # lines(density(x), col = 2, lwd = 2) # Uncomment to add density lines
}
pairs(trainingData_no_y[trainingData$class == "initiation",], 
      upper.panel = NULL, 
      diag.panel = panel.hist, 
      main = "Initation points")

pairs(trainingData_no_y[trainingData$class == "non-initiation",], 
      upper.panel = NULL, 
      diag.panel = panel.hist, 
      main = "Non-initation points")

pairs(trainingData_no_y, 
      upper.panel = NULL, 
      diag.panel = panel.hist, 
      main = "All points", 
      col = ifelse(trainingData$class == "initiation", 1, 2))
legend("topright", 
       legend = c("initiation", "non-initiation"), 
       pch = 1, 
       col = c(1, 2))

Random forest model

Next, we fit a random forest model on the training data, and examine some model stats.

rfModel <- randomForest(
  formula = class ~ .,
  data = trainingData
)
plot(rfModel)

Error rate

errorRateDf <- data.frame(rfModel$err.rate[rfModel$ntree,])
colnames(errorRateDf) <- "error rate"
errorRateDf
##                error rate
## OOB            0.12500000
## initiation     0.39189189
## non-initiation 0.03603604

Confusion Matrix

rownames(rfModel$confusion) <- c("true initiation", "true non-initiation")
rfModel$confusion
##                     initiation non-initiation class.error
## true initiation             45             29  0.39189189
## true non-initiation          8            214  0.03603604

Variable importance

imp <- importance(rfModel)
imp
##              MeanDecreaseGini
## dev_15              19.103924
## prof_15              8.329492
## grad_15             16.262225
## plan_15             46.206514
## pca_15m_48hr         9.420284
## pca_15m_5hr         11.909237
varImpPlot(rfModel)

Partial Depentence

These plots examine the partial dependence of the model outcome on each variable, plotted in order of greatest to least importance. Higher values indicate that those values for a variable are more likely to result in classifying a point as an initiation point. 0

vars_by_importance <- rownames(imp)[order(imp[, 1], decreasing = TRUE)]
for (variable in vars_by_importance) {
  partialPlot(rfModel, 
              trainingData, 
              eval(variable), 
              xlab = variable, 
              main = paste("Partial Dependence on", variable))
}

Non-initation Points

Let’s see how the outcome varies depending on the non-initiation points.

iterations <- 1:params$iterations_count

trainingDataList <- lapply(iterations, function(x) {
  
  noninitBuffers <- generateNoninitiationBuffers(
    noninitBuffersCount,
    noninitRegion,
    bufferRadius
  )
  
  trainingData <- extractBufferValues(
    varsRaster,
    initBuffers,
    noninitBuffers,
    bufferExtractionMethod
  )
  
  # Remove coordinates from data
  coordsCols <- names(trainingData) %in% c("x", "y")  
  trainingData[,!coordsCols]
  
})
## Warning: [spatSample] fewer cells returned than requested

## Warning: [spatSample] fewer cells returned than requested

## Warning: [spatSample] fewer cells returned than requested

## Warning: [spatSample] fewer cells returned than requested

## Warning: [spatSample] fewer cells returned than requested

## Warning: [spatSample] fewer cells returned than requested

## Warning: [spatSample] fewer cells returned than requested

## Warning: [spatSample] fewer cells returned than requested

## Warning: [spatSample] fewer cells returned than requested

## Warning: [spatSample] fewer cells returned than requested

## Warning: [spatSample] fewer cells returned than requested

## Warning: [spatSample] fewer cells returned than requested

## Warning: [spatSample] fewer cells returned than requested

## Warning: [spatSample] fewer cells returned than requested

## Warning: [spatSample] fewer cells returned than requested

## Warning: [spatSample] fewer cells returned than requested

## Warning: [spatSample] fewer cells returned than requested

## Warning: [spatSample] fewer cells returned than requested

## Warning: [spatSample] fewer cells returned than requested

## Warning: [spatSample] fewer cells returned than requested

## Warning: [spatSample] fewer cells returned than requested

## Warning: [spatSample] fewer cells returned than requested

## Warning: [spatSample] fewer cells returned than requested

## Warning: [spatSample] fewer cells returned than requested

## Warning: [spatSample] fewer cells returned than requested
rfModelList <- lapply(trainingDataList, function(d) {
  randomForest(
    formula = class ~ .,
    data = d
  )
})

for (i in iterations) {
  cat("MODEL ", i)
  trainingData <- trainingDataList[[i]]
  trainingData_no_y <- trainingData[, 1:6]
  rfModel <- rfModelList[[i]]
  pairs(trainingData_no_y[trainingData$class == "non-initiation",], 
      upper.panel = NULL, 
      diag.panel = panel.hist, 
      main = "Non-initation points")
  cat("\n\nERROR RATE\n")
  errorRateDf <- data.frame(rfModel$err.rate[rfModel$ntree,])
  colnames(errorRateDf) <- "error rate"
  cat(paste0(capture.output(errorRateDf), collapse = "\n"))
  cat("\n\nCONFUSION MATRIX\n")
  rownames(rfModel$confusion) <- c("true initiation", "true non-initiation")
  cat(paste0(capture.output(rfModel$confusion), collapse = "\n"))
  cat("\n\nIMPORTANCE\n")
  imp <- importance(rfModel)
  cat(paste0(capture.output(imp), collapse = "\n"))
  varImpPlot(rfModel)
  vars_by_importance <- rownames(imp)[order(imp[, 1], decreasing = TRUE)]
  par(mfrow = c(3,2))
  for (variable in vars_by_importance) {
    partialPlot(rfModel, 
                trainingData, 
                eval(variable), 
                xlab = variable, 
                main = variable)
  }
  par(mfrow = c(1,1))
}
## MODEL  1

## 
## 
## ERROR RATE
##                error rate
## OOB            0.11525424
## initiation     0.32432432
## non-initiation 0.04524887
## 
## CONFUSION MATRIX
##                     initiation non-initiation class.error
## true initiation             50             24  0.32432432
## true non-initiation         10            211  0.04524887
## 
## IMPORTANCE
##              MeanDecreaseGini
## dev_15              17.848251
## prof_15              8.869807
## grad_15             15.273370
## plan_15             50.814374
## pca_15m_48hr         9.434156
## pca_15m_5hr          8.588192

## MODEL  2

## 
## 
## ERROR RATE
##                error rate
## OOB            0.12585034
## initiation     0.36486486
## non-initiation 0.04545455
## 
## CONFUSION MATRIX
##                     initiation non-initiation class.error
## true initiation             47             27  0.36486486
## true non-initiation         10            210  0.04545455
## 
## IMPORTANCE
##              MeanDecreaseGini
## dev_15              16.139102
## prof_15              8.830830
## grad_15             16.134941
## plan_15             50.779926
## pca_15m_48hr         9.129391
## pca_15m_5hr          9.025634

## MODEL  3

## 
## 
## ERROR RATE
##                error rate
## OOB            0.11525424
## initiation     0.36486486
## non-initiation 0.03167421
## 
## CONFUSION MATRIX
##                     initiation non-initiation class.error
## true initiation             47             27  0.36486486
## true non-initiation          7            214  0.03167421
## 
## IMPORTANCE
##              MeanDecreaseGini
## dev_15              15.801922
## prof_15              9.205357
## grad_15             16.196440
## plan_15             50.945816
## pca_15m_48hr         9.348049
## pca_15m_5hr          9.189982

## MODEL  4

## 
## 
## ERROR RATE
##                error rate
## OOB            0.10810811
## initiation     0.35135135
## non-initiation 0.02702703
## 
## CONFUSION MATRIX
##                     initiation non-initiation class.error
## true initiation             48             26  0.35135135
## true non-initiation          6            216  0.02702703
## 
## IMPORTANCE
##              MeanDecreaseGini
## dev_15              16.285885
## prof_15              8.810138
## grad_15             15.413026
## plan_15             49.604670
## pca_15m_48hr        10.334452
## pca_15m_5hr          9.862828

## MODEL  5

## 
## 
## ERROR RATE
##                error rate
## OOB            0.10847458
## initiation     0.36486486
## non-initiation 0.02262443
## 
## CONFUSION MATRIX
##                     initiation non-initiation class.error
## true initiation             47             27  0.36486486
## true non-initiation          5            216  0.02262443
## 
## IMPORTANCE
##              MeanDecreaseGini
## dev_15              17.057033
## prof_15              9.003330
## grad_15             16.464679
## plan_15             49.783096
## pca_15m_48hr         9.708581
## pca_15m_5hr          9.130698

Probability Raster

After fitting a random forest model, we can create a probability raster by fitting the data in the analysis region to the model. We do that for each iteration, and use the average of all values.

analysisAreaVarsRaster <- terra::mask(varsRaster, analysisRegionMask)
probRasterList <- lapply(rfModelList, function(model) {
  terra::predict(
    analysisAreaVarsRaster,
    model,
    na.rm = TRUE,
    type = "prob"
  )[["initiation"]]
})
probRasterList <- terra::rast(probRasterList)
mean_prob_raster <- terra::app(probRasterList, mean)
min_prob_raster <- terra::app(probRasterList, min)
max_prob_raster <- terra::app(probRasterList, max)
diff_prob_raster <- max_prob_raster - min_prob_raster
rm(probRasterList)

plotRGB(terrain)
plot(shade, col=grey(0:100/100), add = TRUE)
plot(mean_prob_raster, 
     col = colorRampPalette(c("#56d663", "#ba5123", "#9e2509"))(25), 
     add = TRUE)
legend("left", 
       legend = c(rep(NA, 3), "0.9", rep(NA, 23), "0"), 
       fill = c(rep("transparent", 3), colorRampPalette(c("#9e2509", "#ba5123", "#56d663"))(25)), 
       y.intersp = 0.5, 
       border = "transparent", 
       title = "Mean Probability")

Proportion Raster

We can calculate a proportion raster from the probability raster, and use it to see, for example, 50% of landslides are predicted to occur in the green region plotted below.

prop_raster <- createPropRaster(mean_prob_raster)

plotRGB(terrain)
plot(shade, col=grey(0:100/100), add = TRUE)
top50 <- terra::app(prop_raster, function(x) x >= 0.5)
plot(top50, add = TRUE, col = c("transparent", "green"))

Alternatively, the other 50% of landslides are predicted to occur in the rest of the region plotted below. These are the parts of the analysis region with lower probability of landslides.

plotRGB(terrain)
plot(shade, col=grey(0:100/100), add = TRUE)
plot(top50, add = TRUE, col = c("green", "transparent"))